Spark Dataframe基本操作
创建DataFrame
1 | val df=spark.read.format("json").load("/data/flight-data/json/2015-summary.json") |
1 | val myManualSchema = new StructType( |
1 | val myDF=Seq("Hello",2,1L).toDF("col1","col2","col3") |
Select
1 | df.select("col1","col2").show(2) |
1 | df.select( |
1 | df.selectExpr( |
字面量
1 | df.select(expr("*"),lit(1).as("One")).show(2) |
添加列
1 | df.withColumn("numberOne",lit(1)).show(2) |
1 | df.withColumn("withinCountry",expr("col1==col2")).show(2) |
重命名列
1 | df.withColumnRenamed("old","new").columns |
删除列
1 | df.drop("col1").columns |
过滤行
1 | df.filter(col("count")<2).show(2) |
行排序
1 | df.orderBy("count","DEST_COUNTRY_NAME").show() |